import pandas as pd
import time
from datetime import date
from scholarly import scholarly

# start timer
start_time = time.time()
today = date.today()

queries = [
    '"as of my last knowledge update"',
    '"I don\'t have access to real-time data"',
    '"as of my last knowledge update" AND "I don\'t have access to real-time data"',
]

for idx,query in enumerate(queries):
    print(query[1:-1])
    search_query = scholarly.search_pubs(query)
    # lists for storing the data
    papers_data = []
    urls = []
    flag = []
   
    # loop to n get papers
    for i in range(250): # set the number of papers to retrieve, I recommend doing the search on Google Scholar first to see how many papers are returned
        try:
            # tries to get the next paper
            paper = next(search_query)  
            papers_data.append(paper['bib']) # get the paper data
            urls.append(paper['pub_url'])  # get the publication URL
            flag.append(0) 
            time.sleep(1)
        except KeyError as e:
            # if KeyError, paper does not have a publication URL
            if 'eprint_url' in paper:
                urls.append(paper['eprint_url'])
            else:
                urls.append('na')
            flag.append(1)
        except StopIteration:
            # stop if there are no more papers
            break
        # print the progress time
        elapsed_time = time.time() - start_time
        print(f"Fetched paper {i + 1}, elapsed time: {elapsed_time:.2f} seconds")

    # create dataframe 
    df = pd.DataFrame(papers_data)
    df['pub_url'] = urls
    df['query'] = query
    # saves to csv and excel
    df.to_csv('data/scholarly_papers_{}.csv'.format(idx), index=False)
    df.to_excel('data/scholarly_papers_{}.xlsx'.format(idx), engine='xlsxwriter', index=False)
    # prints total time
    total_time = time.time() - start_time
    print(f"Total elapsed time: {total_time:.2f} seconds")
